********************************************************************************************************
** This do.file estimates CES elasticities
** following Feenstra (1994) and Broda and Weinstein (2006)
** [3] Households in top income quintile only
********************************************************************************************************

global db "D:\Dropbox\unequal_gains\main_data"
cd "D:\Dropbox\unequal_gains\QJE revision plan\analysis"
global Appendixresultspath "D:\Dropbox\unequal_gains\QJE revision plan\analysis\clean_results\appendix"

*********************************************************************
****** HOUSEHOLDS IN TOP INCOME QUINTILES ************************
*********************************************************************

****************************
*** PART 0: Data Building
****************************

foreach i of numlist 2004(1)2015 {
use "$db/Important Datasets/purchases_`i'.dta", clear
merge m:1 trip_code_uc using "$db/Important Datasets/trips_`i'.dta", keepusing(trip_code_uc household_code purchase_date)
keep if _merge==3
drop _merge

gen year=real(substr(purchase_date,1,4))
gen month=real(substr(purchase_date,6,2))
* there are a few observations from december of the previous year - get rid of this
keep if year==`i'

gen quarter=1 if month==1 | month==2 | month==3 
replace quarter=2 if month==4 | month==5 | month==6 
replace quarter=3 if month==7 | month==8 | month==9 
replace quarter=4 if month==10 | month==11 | month==12 

* impose the same restrictions as Broad and Weinstein in their 2010 paper
drop if quantity<=0
drop if total_price_paid-coupon_value<=0

collapse (sum) quantity total_price_paid coupon_value, by(upc upc_ver_uc household_code year quarter) fast
merge m:1 household_code using "$db/Important Datasets/panelists_`i'.dta", keepusing(projection_factor household_code household_income)
keep if _merge==3
drop _merge

* FOCUS ON BOTTOM INCOME QUINTILE
drop if missing(household_income)
keep if household_income>26

* impose same restriction as Borda Weinstein 2010 in terms of number of buyers per upc-quarter:
bysort upc upc_ver_uc year quarter: gen rawnb=[_N]
*sum rawnb, d
* about 26% of upcs have less than 20 buyers per year
drop if rawnb<20

* compute the variance in average price across consumer per quarter so that 
* we can get rid of UPCS with too high CVs if needed
gen consumer_average_price=(total_price_paid-coupon_value)/quantity
bysort upc upc_ver_uc year quarter: egen market_average_price=sum(consumer_average_price)
replace market_average_price=market_average_price/rawnb
gen temp=(consumer_average_price-market_average_price)^2
bysort upc upc_ver_uc year quarter: egen sd_consumer_average_price=sum(temp)
drop temp
replace sd_consumer_average_price=sqrt(sd_consumer_average_price/(rawnb-1))
drop if missing(sd_consumer_average_price)
gen cv_consumer_average_price=sd_consumer_average_price/market_average_price*100
sum cv, d
* let's drop observation above the 1%
drop if cv>r(p99)

* now keep info without weight and with projection weights
gen consumer_total_spending=total_price_paid-coupon_value
bysort upc upc_ver_uc year quarter: egen market_total_spending=sum(consumer_total_spending)

collapse  (mean) consumer_average_price market_average_price market_total_spending rawnb cv_consumer_average_price (sum) consumer_total_spending  [fw=projection_factor], by(upc upc_ver_uc year quarter) fast
rename consumer_average_price market_average_price_w
rename consumer_total_spending market_total_spending_w

* now bring in the info on the upc classification (module, etc.)
merge m:1 upc upc_ver_uc using "$db/Important Lists/products_short_final.dta"
keep if _merge==3
drop _merge
save "$db/Important Datasets/elasticity_estimation_quarterly_`i'_topQ.dta", replace
}

******************
*** PART 1: OLS
******************

* A. Prepare data

* load
use "$db/Important Datasets/elasticity_estimation_quarterly_2004_topQ.dta", clear
foreach i of numlist 2005(1)2015 {
append using "$db/Important Datasets/elasticity_estimation_quarterly_`i'_topQ.dta"
}

* make sure all upcs are in the same module over time
egen upc_id=group(upc upc_ver_uc)
sort upc_id year quarter
by upc_id: egen medmod=median(product_module_code)
replace product_module_code = medmod
drop medmod
gen upc6=int(upc/1000000)

* calculate share variables in module*quarter
sort year quarter product_module_code upc6
egen mod_period=group(year quarter product_module_code)
bysort mod_period: egen Sumval=sum(market_total_spending)
gen share=market_total_spending/Sumval

* calculate dlnp and dlnShares at the UPC level
gen period=.
replace period=quarter if year==2004
foreach i of numlist 2005(1)2015 {
replace period=(`i'-2004)*4+quarter if year==`i'
}

sort upc_id period
tsset upc_id period
by upc_id: gen dlnp=ln(market_average_price) - ln(L.market_average_price) 
gen price_change_pc=dlnp*100
sum price_change_pc, d
hist price_change_pc if price_change_pc<r(p99) & price_change_pc>r(p1), bin(100)

sort upc_id period
by upc_id: gen dlnShare=ln(share) - ln(L.share) 
gen share_change_pc=dlnShare*100
sum share_change_pc, d
hist share_change_pc if share_change_pc<r(p99) & share_change_pc>r(p1), bin(100)
drop share_change_pc price_change_pc

* follow Broda and Weinstein to generate weights and constant
by upc_id: egen T=count(period)
by upc_id: gen temp=1/rawnb+1/L.rawnb
by upc_id: egen sumt=sum(temp)
gen constant=sumt/T

* winsorize (important to drop missing obs otherwise we impute large values for the missing obs)
drop if missing(dlnp) | missing(dlnShare) 
foreach i in dlnp dlnShare{
sum `i', d
replace `i'=r(p99) if `i'>r(p99)
replace `i'=r(p1) if `i'<r(p1)
}

* choose a upc per module-period to difference the data
sort upc
by upc: egen avgshare=mean(share)

gsort product_module period -T -avgshare

by product_module period: gen dlnsharek = dlnShare[1]
by product_module period: gen dlnpk = dlnp[1]

* eliminate irrelevant entries (upcs for which we did the differencing)
by product_module period: drop if _n==1

* create relevant variables
gen Ddlnp = dlnp - dlnpk 
gen D2dlnp = Ddlnp^2 
drop dlnp 
gen Ddlnshare = dlnShare - dlnsharek 
gen D2dlnshare = Ddlnshare^2 
drop dlnShare avgshare
gen DdlnpDdlnshare = Ddlnp*Ddlnshare 

* finalize dataset
keep product_module_code period upc_id sumt D* T constant
collapse product_module_code D* T sumt constant, by(upc)

* create weighted variables as in Broda-Weinstein (2010) 
gen weight=T^3/sumt
foreach var in D2dlnp constant D2dlnshare DdlnpDdlnshare {
gen w`var' = sqrt(weight)*`var'
}
save "$db/Important Datasets/elasticity_estimation_quarterly_topQ_final.dta", replace

* B. Product-module-specific regressions

* initiate dataset to store elasticity estimates
clear
set obs 1 
foreach i in sigma sigma_se product_module_code_id product_module_code theta1 theta2 {
gen `i'=.
}
save "$db/Important Datasets/estimated_elasticities_topQ_final.dta", replace

use "$db/Important Datasets/elasticity_estimation_quarterly_topQ_final.dta", clear
merge m:1 product_module_code using "$db/Important Lists/product_module_code_id.dta"
keep if _merge==3
drop _merge
* drop the modules for which we have less than four observations (otherwise we get perfect fit)
bysort product_module_code: gen tot=[_N]
keep if tot>4
drop tot
save "$db/Important Datasets/elasticity_estimation_quarterly_topQ_final.dta", replace
keep product_module_code
duplicates drop 
describe

foreach i of numlist 1(1)1036 {
estimates clear
use "$db/Important Datasets/elasticity_estimation_quarterly_topQ_final.dta", clear

* keep just one module
capture noisily keep if product_module_code_id==`i'

* winsorize data within module
foreach var in wD2dlnp wD2dlnshare wDdlnpDdlnshare {
capture noisily sum `var', d
capture noisily gen p99_`var'=r(p99)
capture noisily gen p1_`var'=r(p1)
capture noisily replace `var'=p99_`var' if `var'>p99_`var'
capture noisily replace `var'=p1_`var' if `var'<p1_`var'
}

* run regression according to model and obtain standard errors by using the lncom command 
capture noisily rename wD2dlnshare x1
capture noisily rename wDdlnpDdlnshare x2
capture noisily reg wD2dlnp wconstant x1 x2, r
capture noisily gen theta1=_b[x1]
capture noisily gen theta2=_b[x2]

capture noisily if _b[x1] > 0 {
	capture noisily if _b[x2]>0 {
		capture noisily nlcom 1+(2* (1/2 + (1/4 - 1/(4+(_b[x2]^2/_b[x1])))^(1/2)) -1)/(1 + (1/2 - (1/4 - 1/(4+(_b[x2]^2/_b[x1])))^(1/2))) * (1/_b[x2]), post
		}
	capture noisily else {
		capture noisily nlcom 1+(2* (1/2 - (1/4 - 1/(4+(_b[x2]^2/_b[x1])))^(1/2)) -1)/(1 - (1/2 - (1/4 - 1/(4+(_b[x2]^2/_b[x1])))^(1/2))) * (1/_b[x2]), post
	}
capture noisily gen sigma=_b[_nl_1]
capture noisily gen sigma_se=_se[_nl_1]
capture noisily keep sigma sigma_se product_module_code_id product_module_code theta1 theta2
capture noisily drop if missing(sigma)
capture noisily duplicates drop
capture noisily append using "$db/Important Datasets/estimated_elasticities_topQ_final.dta"
capture noisily save "$db/Important Datasets/estimated_elasticities_topQ_final.dta", replace
}

capture noisily if theta1 < 0 {
capture noisily keep theta1 theta2 product_module_code_id product_module_code
capture noisily duplicates drop
capture noisily append using "$db/Important Datasets/estimated_elasticities_topQ_final.dta"
capture noisily save "$db/Important Datasets/estimated_elasticities_topQ_final.dta", replace
}

display "done with module `i'"
}
use "$db/Important Datasets/estimated_elasticities_topQ_final.dta", clear
keep sigma sigma_se product_module_code_id product_module_code theta1 theta2
duplicates drop
* drop first entry we use to set up the dataset
drop if missing(theta1)
sum sigma, d
sum sigma if sigma/sigma_se>1.96, d
gen implied_markup=sigma/(sigma-1)
save "$db/Important Datasets/estimated_elasticities_topQ_final_ols.dta", replace

**************************
*** PART 2: GRID SEARCH
**************************

clear
set obs 53
* generate sigma between 1.5 and 19 (based on our results above)
gen sigma=.
replace sigma=1.5 if [_n]==1
* then increase by 5% increments:
replace sigma=sigma[_n-1]+0.05*sigma[_n-1] if [_n]>1 
tab sigma
* now use this list as grid points to write the loop below

* set up the file that will store the results
clear
set obs 1
gen product_module_code=.
save elasticities_grid_results_topQ, replace

* ii) Running loop
foreach sigma in 1.5 1.575 1.65375 1.736438 1.823259 1.914422 ///
2.010144 2.110651 2.216183 2.326993 2.443342 2.565509 2.693785 ///
2.828474 2.969898 3.118393 3.274312 3.438028 3.609929 3.790426 ///
3.979947 4.178945 4.387892 4.607286 4.837651 5.079533 5.33351 ///
5.600185 5.880195 6.174204 6.482914 6.80706 7.147413 7.504784 ///
7.880023 8.274025 8.687726 9.122112 9.578218 10.05713 10.55998 ///
11.08798 11.64238 12.2245 12.83573 13.47751 14.15139 14.85896 ///
15.60191 16.382 17.2011 18.06116 18.96421 {

foreach rho in 0 0.05 0.1 0.15 0.2 0.25 0.3 0.35 0.4 0.45 0.5 0.55 0.6 0.65 0.7 ///
0.75 0.8 0.85 0.9 0.95 {

use "$db/Important Datasets/elasticity_estimation_quarterly_topQ_final.dta", clear

gen theta1=`rho'/( (`sigma'-1)^2*(1-`rho') )
gen theta2=(2*`rho'-1)/( (`sigma'-1)*(1-`rho') )

rename wD2dlnshare x1
rename wDdlnpDdlnshare x2
gen epsilon=wD2dlnp - theta1*x1 - theta2*x2
bysort product_module_code: egen epsilonbar=mean(epsilon) 
gen epsilon_sq=epsilon^2
gen epsilon_minusbar_sq=(epsilon-epsilonbar)^2

collapse (sum) epsilon_sq epsilon_minusbar_sq, by(product_module_code) fast 
gen sigma=`sigma'
gen rho=`rho'
append using elasticities_grid_results_topQ
drop if missing(product_module_code)
save elasticities_grid_results_topQ, replace
display "done with sigma `sigma' rho `rho'"
}
}

* Store results (without a constant)
use elasticities_grid_results_topQ, clear
collapse (min) epsilon_sq, by(product_module_code) fast
merge 1:m product_module_code epsilon_sq using elasticities_grid_results_topQ
keep if _merge==3
drop _merge
sum sigma, d
save sigma_noconstant_product_module_topQ, replace

* Consolidate with OLS estimates
rename sigma sigma_grid
merge 1:1 product_module_code using "$db/Important Datasets/estimated_elasticities_topQ_final_ols.dta"
drop _merge
gen grid_estimate=0
replace grid_estimate=1 if missing(sigma)
replace sigma=sigma_grid if missing(sigma)
keep product_module_code sigma grid_estimate sigma_se
rename sigma sigma_topQ
rename grid_estimate grid_estimate_topQ
rename sigma_se sigma_se_topQ

* Bring to main sigma dataset and impute at dept/overall level
merge 1:1 product_module_code using "$db/Important Price Datasets/sigmas"
gen imputed_topQ=(_merge==2)
drop _merge
bysort department: egen sigma_avg=median(sigma_topQ)
replace sigma_topQ=sigma_avg if missing(sigma_topQ)
drop sigma_avg
egen sigma_avg=median(sigma_topQ)
replace sigma_topQ=sigma_avg if missing(sigma_topQ)

keep product_module_code product_group_code department_code *_common *_topQ *_bottomQ
save "$db/Important Price Datasets/sigmas", replace

* Document distribution of elasticities for top vs. bottom:
use "$db/Important Price Datasets/sigmas", clear
twoway (histogram sigma_bottomQ  if sigma_bottomQ<15, bin(100)) ///
(histogram sigma_topQ if sigma_top<15, fcolor(none) lcolor(black) bin(100)), ///
legend(order(1 "Bottom Income Quintile" 2 "Top Income Quintile" ) pos(12) ring(0) region(lwidth(none))) graphregion(color(white)) ///
xtitle("Elasticity of Substitution (within Product Module)") 
graph export "$Appendixresultspath/AppendixElasticities.pdf", as(pdf) replace
graph save "$Appendixresultspath/AppendixElasticities.gph", replace

